Import Packages

library(tidyverse)
library(mapview)
library(sf)
library(tigris)
library(plotly)

options(
  tigris_class = "sf"
)

Set root directory to local folder of NYT data

opts_knit$set(root.dir = "~/Documents/GitHub/nytimes-covid-19-data") # change this path if necessary.

# Again, when running in RStudio, use setwd() instead.
setwd("~/Documents/GitHub/nytimes-covid-19-data")

Get State and County Geometries

Get State Geometries

states <- 
  states(cb = F, progress_bar = FALSE) %>% # This comes from tigris package. Google tigris r to see full functionality. Also note that in the first chunk, we set a tigris option to automatically load polygons as sf type.
  st_transform(4326) # tigris by default is in a different coordinate system, so we have to transform to maintain consistency with our points

Get CA County Geometries

ca_counties <- 
  counties("CA", cb = F, progress_bar = FALSE) %>% 
  st_transform(4326)

Create List of Bay Area Counties

bay_counties <-
  c(
    "Alameda",
    "Contra Costa",
    "Marin",
    "Napa",
    "San Francisco",
    "San Mateo",
    "Santa Clara",
    "Solano",
    "Sonoma"
  )

By State

Read In NYT States Data

covid_nyt_state <- 
  read_csv("us-states.csv") %>% as.data.frame()

print(paste0("Latest date: ",max(covid_nyt_state$date)))
## [1] "Latest date: 2020-05-20"

Add a death rate column

covid_nyt_state$deathrate <- covid_nyt_state$deaths/covid_nyt_state$cases

Create df of states data filtered to latest date as sf objects

covid_nyt_by_state <-
  covid_nyt_state %>% 
  filter(date == max(covid_nyt_state$date)) %>% 
  right_join(states, by = c("state" = "NAME")) %>% 
  st_as_sf() %>%
  select(date:deathrate)

State Maps

Cases by State for latest date

# mapview(covid_nyt_by_state, zcol = "cases")

Deaths by State for latest date

# mapview(covid_nyt_by_state, zcol = "deaths")

Deaths/Cases by State for latest date

# mapview(covid_nyt_by_state, zcol = "deathrate")

By County

Read In NYT Counties Data

covid_nyt_county <- 
  read_csv("us-counties.csv")

print(paste0("Latest date: ",max(covid_nyt_county$date)))
## [1] "Latest date: 2020-05-20"

Add a death rate column

covid_nyt_county$deathrate <- covid_nyt_county$deaths/covid_nyt_county$cases

Filter to CA Counties

covid_nyt_ca_county <- filter(covid_nyt_county, state == "California")

Create df of CA counties data filtered to latest date as sf objects

covid_nyt_ca_by_county <-
  covid_nyt_county %>% 
  filter(state == "California") %>%
  filter(date == max(covid_nyt_state$date)) %>%
  right_join(ca_counties, by = c("county" = "NAME")) %>% 
  st_as_sf() %>%
  select(date:deathrate)

CA County Maps

Cases by County for latest date

# mapview(covid_nyt_ca_by_county, zcol = "cases")

Deaths by County for latest date

# mapview(covid_nyt_ca_by_county, zcol = "deaths")

Deaths/Cases by County for latest date

# mapview(covid_nyt_ca_by_county, zcol = "deathrate")

Bay Area

Filter to Bay Area Counties

covid_nyt_bay_area <- filter(covid_nyt_county, state == "California" & county %in% bay_counties)

Create df of Bay Area counties data filtered to latest date as sf objects

covid_nyt_bay_by_county <-
  covid_nyt_county %>% 
  filter(state == "California") %>% 
  filter(county %in% bay_counties) %>%
  filter(date == max(covid_nyt_state$date)) %>%
  right_join(ca_counties %>% filter(NAME %in% bay_counties), by = c("county" = "NAME")) %>% 
  st_as_sf() %>%
  select(date:deathrate)

Bay Area County Maps

Cases by County for latest date

mapview(covid_nyt_bay_by_county, zcol = "cases")

Deaths by County for latest date

mapview(covid_nyt_bay_by_county, zcol = "deaths")

Deaths/Cases by County for latest date

# mapview(covid_nyt_bay_by_county, zcol = "deathrate")

Plots

US

First read in US data

covid_nyt_us <- 
  read_csv("us.csv") %>% as.data.frame()

Simple ggplot of cases and deaths

ggplot(data = covid_nyt_us) +
  geom_point(mapping = aes(x = date, y = cases), color = 'blue') +
  geom_point(mapping = aes(x = date, y = deaths), color = 'orange') +
  labs(x = "Date", y = "Cumulative Counts")

Put ggplot into plotly for interactive graph

cumulative_cases_timeseries <- ggplot(data = covid_nyt_us) +
  geom_point(mapping = aes(x = date, y = cases), color = 'blue') +
  geom_point(mapping = aes(x = date, y = deaths), color = 'orange') +
  labs(x = "Date", y = "Cumulative Counts")

ggplotly(cumulative_cases_timeseries)

US by State

Cases

cumulative_cases_timeseries <- ggplot(covid_nyt_state, aes(x = date, y = cases, color = state)) + 
  geom_line() +
  geom_point() +
  labs(x = "Date", y = "Cumulative Count of Cases", color = "State")

ggplotly(cumulative_cases_timeseries)

Deaths

cumulative_deaths_timeseries <- ggplot(covid_nyt_state, aes(x = date, y = deaths, color = state)) + 
  geom_line() + 
  geom_point() + 
  labs(x = "Date", y = "Cumulative Count of Deaths", color = "State")

ggplotly(cumulative_deaths_timeseries)

California By County

Cases

cumulative_cases_timeseries <- ggplot(covid_nyt_ca_county, aes(x = date, y = cases, color = county)) + 
  geom_line() +
  geom_point() +
  labs(x = "Date", y = "Cumulative Count of Cases", color = "County")

ggplotly(cumulative_cases_timeseries)

Deaths

cumulative_deaths_timeseries <- ggplot(covid_nyt_ca_county, aes(x = date, y = deaths, color = county)) + 
  geom_line() + 
  geom_point() + 
  labs(x = "Date", y = "Cumulative Count of Deaths", color = "County")

ggplotly(cumulative_deaths_timeseries)

Bay Area By County

Cases

cumulative_cases_timeseries <- ggplot(covid_nyt_bay_area, aes(x = date, y = cases, color = county)) + 
  geom_line() +
  geom_point() +
  labs(x = "Date", y = "Cumulative Count of Cases", color = "County")

ggplotly(cumulative_cases_timeseries)

Deaths

cumulative_deaths_timeseries <- ggplot(covid_nyt_bay_area, aes(x = date, y = deaths, color = county)) + 
  geom_line() + 
  geom_point() + 
  labs(x = "Date", y = "Cumulative Count of Deaths", color = "County")

ggplotly(cumulative_deaths_timeseries)